Load libraries

rm(list=ls())
# library("BiocManager")
# library("devtools")

library("limma")
library("igraph")
library("DT")
library("ggplot2")

# Load gene expression, study design and survival data
load(url("https://raw.githubusercontent.com/babelomics/metabolizer/develop/metabolizer.git/script/analysis/tcga_brca_example.RData"))

# gex[1:3,1:3]
# head(designFile, 2)
# tail(designFile, 2)
# survData[1:3,1:10]

Metabolizer

http://metabolizer.babelomics.org/
Differential metabolic activity and discovery of therapeutic targets using summarized metabolic pathway models.
Gene Expression Integration into Pathway Modules Reveals a Pan-Cancer Metabolic Landscape

Differential Activity

# Load source code and module files of the Metabolizer
source("https://raw.githubusercontent.com/babelomics/metabolizer/develop/metabolizer.git/script/utils/functions.main.14032019.R")
load(url("https://raw.githubusercontent.com/babelomics/metabolizer/develop/metabolizer.git/script/analysis/hsa_module_data_March2019.RData"))


# Run metabolizer
results <- metabolizer(hsa_module_data, 
                       combat.vals=gex, 
                       output_folder=NULL, saveName=NULL, 
                       onesample=F, 
                       expbased=T, fluxbased=F, 
                       default_value=0.5, 
                       moduleinfo=url("https://github.com/babelomics/metabolizer/raw/develop/metabolizer.git/script/analysis/hsa_moduleinfo.RData"))

Module activity calculations are done successfully number of the reactions with missing values: 60

## Warning for M00067_C20825: all the reaction node values are 0.5
## The citation for the Metabolizer tool: 
## -> https://www.nature.com/articles/s41540-019-0087-2 
## -> doi: 10.1038/s41540-019-0087-2
# Get module activities and module annotations
moduleActivities <- results$moduleActivities[ , colnames(gex)]
moduleAnnotation <- results$moduleActivities[ , which(!colnames(results$moduleActivities) %in% colnames(gex))]
# Discard modules with zero variance 
idxKeep <- which(apply(moduleActivities, 1, var)!=0)
moduleActivities <- moduleActivities[idxKeep,]

# moduleAnnotation[1:3,]
# moduleActivities[1:3,1:5]

# Fit linear model for each module
fit <- lmFit(moduleActivities, design)
# Given the linear model fit, compute statistics (p-value, fold-change ... etc)
fit <- eBayes(fit)
# Extract a table of the top-ranked genes from a linear model fit.
top <- topTable(fit,coef="TissueTumor", p.value = 0.05, sort.by = "P", adjust.method = "fdr", number = nrow(moduleActivities))
# add module annotations to toptable
idx <- match(rownames(top), rownames(results$moduleActivities))
top <- cbind(moduleAnnotation[idx, c("Module.ID", "Main.metabolic.category", "Sub.metabolic.category", "Process.description", "StartMetaboliteIDs", "StartMetaboliteNames", "EndMetaboliteIDs", "EndMetaboliteNames", "Link.to.module")], top)

top %>% datatable(extensions = list('Buttons'="NULL", 'ColReorder'="NULL"),
            options = list(pageLength = 5, scrollX = TRUE, rownames = F, width = 800, 
                           colReorder = list(realtime = TRUE), filter = "top", dom = 'Blfrtip', 
                            buttons = list('copy', 'print',
                                           list(extend = 'collection',
                                        buttons = c('csv', 'excel', 'pdf'),
                                        text = 'Download'
                ), I('colvis')),  
                           #buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
                 lengthMenu = list(c(10,25,50,-1), c(10,25,50,"All"))
                ))

Survival

library(survival)

for(m in rownames(moduleActivities)){

  # select module activites for tumor samples and dichotomize continuous variables  
  module_act <- moduleActivities[m, survData$V1[survData$V2=="Tumor"]]
  quants <- quantile(module_act,probs = c(0.1,0.45,0.5,0.55,0.9))
  
  # we used median value as threshold to generate binary variables
  module_act <- as.character(ifelse(module_act < as.numeric(quants["50%"]), "DOWN", "UP"))
  
  if(length(unique(module_act))==1){ next }
  
 # create survival object 
  surv <- Surv(survData$Time, survData$Status)
 # fit survival curves module activity
  sfit <- survfit(surv ~ module_act)
  
 # check if the difference between two curves is significant (p < 0.05)
  my_diff <- survdiff(surv ~ module_act)
  diff_pval <- 1 - pchisq(my_diff$chisq, length(my_diff$n) - 1)
  
  
  if(diff_pval < 0.05){
    # print(paste(m, "| p-value: ",round(diff_pval, 4)))
    modulename <- moduleAnnotation$Process.description[moduleAnnotation$Module.ID==m]
    
    plot(sfit, col = c("blue","red"),  mark.time = T, main = paste0(m,"\n",modulename, "\n p-value ", round(diff_pval, 4)), cex.main= 0.7)
    legend ("bottomleft", legend=c("Down","Up"), col=c("blue", "red"), lty=1:3)
    
  }
}

Correlation

library(corrplot)

# load this function: cor.mtest
cor.mtest <- function(mat, ...) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat<- matrix(NA, n, n)
  diag(p.mat) <- 0
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      tmp <- cor.test(mat[, i], mat[, j], ...)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
    }
  }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}


p.mat <- t(sigModules)
M <- cor(p.mat, method = "pearson")
M_tumor <- cor(p.mat[designFile$V2=="Tumor",], method = "pearson")
M_normal <- cor(p.mat[designFile$V2=="Normal",], method = "pearson")
# Correlation of all samples
corrplot(M, type="upper", order="original", sig.level = 0.01, p.mat = cor.mtest(p.mat), title = "All samples", mar = c(0,0,1,0), tl.cex=1, pch.cex=0.8, pch.col = "black")

# Correlation of only normal samples
corrplot(M_normal, type="upper", order="original", sig.level = 0.01, p.mat = cor.mtest(p.mat[designFile$V2=="Normal",]), title = "Normal samples", mar = c(0,0,1,0), tl.cex=1, pch.cex=0.8, pch.col = "black")

# Correlation of only tumor samples
corrplot(M_tumor, type="upper", order="original", sig.level = 0.01, p.mat = cor.mtest(p.mat[designFile$V2=="Tumor",]), title = "Tumor samples", mar = c(0,0,1,0), tl.cex=1, pch.cex=0.8, pch.col = "black")

PCA

# Run PCA
pc <- prcomp(t(sigModules), scale. = T)
# Calculate variance explained by principal component
varExp <- round(pc$sdev^2/sum(pc$sdev^2),digits = 2)
# PCA Plot
pc_df <- as.data.frame(pc$x)
pc_df$group <- as.factor(designFile$V2)
p<-ggplot(pc_df,aes(x=PC1,y=PC2,color=group))
p<-p+geom_point(size=4, alpha = 0.7)
p <- p + theme_bw() + guides(colour = guide_legend(override.aes = list(size=6)))
p <- p + theme(strip.text.x = element_text(face = "italic"), 
               axis.title.x = element_text(size=14, face="bold"),
               axis.title.y = element_text(size=14, face="bold"),
               legend.title = element_text(size=14, face="bold"),
               legend.text = element_text(size=14, face="bold"))
p <- p + xlab(paste0("PC1 variance %", varExp[1]))
p <- p + ylab(paste0("PC2 variance %", varExp[2]))
p <- p + labs(col="Tissue") + theme(plot.margin = unit(c(1,1,1,1), "cm"))
p

# Visualize the strength of associations between the principal components of an omic data matrix
pc <- prcomp(sigModules, scale. = F)
var_cos2 <- pc$x * pc$x
corrplot(var_cos2[,1:6], is.corr=FALSE)